knitr::opts_chunk$set(echo = TRUE)

Introduction

# Load Packages ----
library(tidyverse)
library(plotly)
library(broom)
library(knitr)
library(kableExtra)


# Load Data ----
afl <- read.csv("/home/gthornton1999/VU/3. Spatiotemporal/Assignment 3/Data/Assessment3_SpatiotemporalDataset2.csv")



# Wrangle ----
#column names
colnames(afl)
##  [1] "SampleNumber" "Time_Seconds" "Time_Minutes" "Latitude"     "Longitude"   
##  [6] "Velocity"     "Acceleration" "Game"         "Action"       "Symbol"      
## [11] "Location"     "Quarter"
# how many unique actions are there?
unique(afl$Action)
##  [1] NA                         "Handball Effective"      
##  [3] "Handball Clanger"         "Kick Short"              
##  [5] "Handball Ineffective"     "Kick Ineffective"        
##  [7] "Kick Backwards"           "Kick Inside 50"          
##  [9] "Tackle"                   "Knock On Effective"      
## [11] "Spoil"                    "Ground Kick Ineffective" 
## [13] "Kick In Short"            "Kick Clanger"            
## [15] "Kick Long"                "1-on-1 Contest Defender" 
## [17] "Smotherer After Disposal" "Spoil Gaining"           
## [19] "Kick Long To Advantage"
# what type are eahc variable
str(afl)
## 'data.frame':    336018 obs. of  12 variables:
##  $ SampleNumber: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Time_Seconds: num  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
##  $ Time_Minutes: num  0.00167 0.00333 0.005 0.00667 0.00833 ...
##  $ Latitude    : num  -37.8 -37.8 -37.8 -37.8 -37.8 ...
##  $ Longitude   : num  145 145 145 145 145 ...
##  $ Velocity    : num  0.17 0.272 0.517 0.857 1.097 ...
##  $ Acceleration: num  0.318 0.491 0.981 1.586 1.792 ...
##  $ Game        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Action      : chr  NA NA NA NA ...
##  $ Symbol      : chr  NA NA NA NA ...
##  $ Location    : chr  NA NA NA NA ...
##  $ Quarter     : int  NA NA NA NA NA NA NA NA NA NA ...
# Define Fatigue using "Quarter" as the independent variable. 
# bin the quarters into fatigue levels
# assuming relatively linear increase in fatigue 
afl$Fatigue_Level <- case_when(
  afl$Quarter == 1 ~ "Low",        # Quarter 1: Early in the game
  afl$Quarter == 2 ~ "Low",        # Quarter 2: Still relatively fresh
  afl$Quarter == 3 ~ "Moderate",   # Quarter 3: Fatigue starting to accumulate
  afl$Quarter == 4 ~ "High"        # Quarter 4: High fatigue
)

# Define action variables into "effective" and "ineffective"
# Effective actions
effective_actions <- afl %>% 
  filter(Action %in% c("Handball Effective", "Kick Long To Advantage", 
                       "Knock On Effective", "Spoil Gaining", 
                       "Kick Inside 50", "Kick Long", 
                       "Smotherer After Disposal"))

# Ineffective or Clanger actions
ineffective_actions <- afl %>% 
  filter(Action %in% c("Handball Clanger", "Kick Clanger", 
                       "Handball Ineffective", "Kick Ineffective", 
                       "Ground Kick Ineffective"))

Regression Analysis

Does fatigue have a impact on skill execution?

# Regression ----
# Does fatigue have a impact on skill execution? 

# Step 1: Create a binary skill execution variable on both skill df's
effective_actions <- effective_actions %>%
  mutate(EffectiveBinary = 1)

ineffective_actions <- ineffective_actions %>%
  mutate(EffectiveBinary = 0)

# Step 2: Combine skill df's into 1 df
afl_skill <- bind_rows(effective_actions, ineffective_actions)

# Step 3: Run a logistic regression
# will analyse whether fatigue level is significantly associated with the probability of executing an effective action
skillvfatigue_model <- glm(EffectiveBinary ~ Fatigue_Level, family = "binomial", data = afl_skill)
# Summarise regression
summary(skillvfatigue_model)
## 
## Call:
## glm(formula = EffectiveBinary ~ Fatigue_Level, family = "binomial", 
##     data = afl_skill)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4528  -1.3856   0.9250   0.9826   1.0203  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)            0.38137    0.17336   2.200   0.0278 *
## Fatigue_LevelLow       0.09589    0.21397   0.448   0.6540  
## Fatigue_LevelModerate  0.24618    0.23324   1.055   0.2912  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 779.51  on 587  degrees of freedom
## Residual deviance: 778.34  on 585  degrees of freedom
## AIC: 784.34
## 
## Number of Fisher Scoring iterations: 4
# make neat table using broom package
regression_table <- tidy(skillvfatigue_model)
# add odds ratios
regression_table <- regression_table %>% 
  mutate(odds_ratio = exp(estimate))

# make it neat with kable
regression_table %>%
  select(term, estimate, std.error, odds_ratio, p.value) %>%
  mutate(across(where(is.numeric), ~ round(., 3))) %>%
  kable(col.names = c("Term", "Estimate", "Std. Error", "Odds Ratio", "P-Value"),
        caption = "Logistic Regression: Effect of Fatigue on Skill Execution") %>%
  kable_styling(bootstrap_options = c("striped", "condensed"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE)
Logistic Regression: Effect of Fatigue on Skill Execution
Term Estimate Std. Error Odds Ratio P-Value
(Intercept) 0.381 0.173 1.464 0.028
Fatigue_LevelLow 0.096 0.214 1.101 0.654
Fatigue_LevelModerate 0.246 0.233 1.279 0.291
# Tidy the model output
model_summary <- tidy(skillvfatigue_model) %>%
  mutate(
    Odds_Ratio = exp(estimate),  # convert coefficients to odds ratios
    estimate = round(estimate, 3),
    std.error = round(std.error, 3),
    statistic = round(statistic, 3),
    p.value = round(p.value, 3),
    Odds_Ratio = round(Odds_Ratio, 3)
  )

# Rename columns for clarity
colnames(model_summary) <- c("Term", "Coefficient", "Std. Error", "Z value", "P value", "Odds Ratio")

# Create a neat table
kable(model_summary, caption = "Logistic Regression Summary: Effect of Fatigue on Skill Execution") %>%
  kable_styling(full_width = FALSE, position = "center", bootstrap_options = c("striped", "hover"))
Logistic Regression Summary: Effect of Fatigue on Skill Execution
Term Coefficient Std. Error Z value P value Odds Ratio
(Intercept) 0.381 0.173 2.200 0.028 1.464
Fatigue_LevelLow 0.096 0.214 0.448 0.654 1.101
Fatigue_LevelModerate 0.246 0.233 1.055 0.291 1.279
# Step 4: Interpret summary statistics
# Performance under high fatigue (intercept) is statistically significant (P value = 0.028)
# The coefficients of Low and Moderate fatigue levels are positive = the player performs slightly better under less fatigue
# However, the P values are greater than 0.05 = the differences in skill execution and fatigue levels are not statistically significant

# Step 5: Visualise the effect
ggplot(afl_skill, aes(x = Fatigue_Level, fill = as.factor(EffectiveBinary))) +
  geom_bar(position = "fill") +
  labs(x = "Fatigue Level", y = "Proportion of Actions", title = "Skill Execution by Fatigue Level") +
  scale_fill_manual(values = c("0" = "orange", "1" = "purple"),
                    labels = c("Ineffective", "Effective"),
                    name = "Skill Execution") +
  theme_minimal()

# Shows the most ineffective skills occur when High Fatigue
# The least is under moderate fatigue - why??

# Step 6: Convert coefficients to Odds Ratios
# tells us how the odds of executing an effective action change as fatigue level increases
exp(coef(skillvfatigue_model))
##           (Intercept)      Fatigue_LevelLow Fatigue_LevelModerate 
##              1.464286              1.100639              1.279133
# intercept = 1.46 -> baseline of odds of an effective action when the fatigue level is high
# Fatigue Level Low = 1.10 -> odds of an effective action are 1.10x higher in Low Fatigue than in High
# Fatigue Level Moderate = 1.28 -> odds of an effective skill action are 1.28x higher in Moderate than in High Fatigue

# Step 7: Overall interpretation

# As fatigue increases, the odds of performing an effective action decreases.
# players are more likely to perform well under low or moderate fatigue than high
# 

Spatial Distribution of Errors

Using Fatigue levels to assess where skill errors commonly occur

# Spatial Distribution of Errors ----
# Using Fatigue levels to assess where skill errors commonly occur

# Step 1: Classify Actions 
# already did this when creating the "ineffective_actions" data frame

# Step 2: Group each individual ineffective action and remove multiple data points for the same action until there is one. 
ineffective_actions_single <- ineffective_actions %>%
  group_by(Action, Location, Quarter, Game) %>%
  slice(1) %>%  # Take the first time stamp for that action group
  ungroup()


# Step 3: Plot bar chart using Location variables (field zones)
ggplot(ineffective_actions_single, aes(x = Location, fill = Fatigue_Level)) +
  geom_bar(position = "dodge") +
  labs(
    title = "Count of Ineffective Actions by Field Location and Fatigue Level",
    x = "Field Zone",
    y = "Count of Clangers",
    fill = "Fatigue Level"
  ) +
  theme_minimal()

Skill Outcome by Field Location and Fatigue Level

# Comparing effective vs. ineffective actions across field locations and fatigue levels. 

# Step 1: Creating df for individual effective actions (same method as Step 2)
effective_actions_single <- effective_actions %>%
  group_by(Action, Location, Quarter, Game) %>%
  slice(1) %>%  # Take the first time stamp for that action group
  ungroup()

# Step 2: Create "Skill_Outcome" variable in each df
ineffective_actions_single <- ineffective_actions_single %>%
  mutate(Skill_Outcome = "Ineffective")
effective_actions_single <- effective_actions_single %>% 
  mutate(Skill_Outcome = "Effective")

# Step 3: Combine the 2 df
combined_actions_single <- bind_rows(effective_actions_single, ineffective_actions_single)

# Step 4: Create a data frame for the counts of Skill Outcomes
counts_df <- combined_actions_single %>%
  group_by(Location, Skill_Outcome, Fatigue_Level) %>%
  summarise(Count = n(), .groups = "drop")

# Step 5: Create ggplot value
spatial_interactive <- ggplot(counts_df, aes(x = Location, y = Count, fill = Skill_Outcome,
                           text = paste0(
                             "Location: ", Location, "<br>",
                             "Skill Outcome: ", Skill_Outcome, "<br>",
                             "Fatigue Level: ", Fatigue_Level, "<br>",
                             "Count: ", Count
                           ))) +
  geom_col(position = "dodge") +
  facet_wrap(~ Fatigue_Level) +
  labs(
    title = "Skill Outcome by Field Location and Fatigue Level",
    x = " ",
    y = "Action Count",
    fill = "Skill Outcome"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Step 6: Convert to interactive plot
ggplotly(spatial_interactive, tooltip = "text")